{% extends "base.html" %}

{% set active_page = 'benchmark-results' %}

{% block title %}Benchmark Results History - Deep Research System{% endblock %}

{% block extra_head %}
<meta name="csrf-token" content="{{ csrf_token() }}">
<meta name="app-version" content="{{ version }}">
<style>
.benchmark-results-card {
    width: 100%;
    margin: 0;
    padding: 0;
    background: transparent;
    border: none;
    box-shadow: none;
}

.card-content {
    padding: 0;
}

.run-card {
    background: #1a1a1a;
    border: 1px solid #333;
    border-radius: 8px;
    padding: 20px;
    margin-bottom: 20px;
    cursor: pointer;
    transition: border-color 0.2s, background-color 0.2s;
}

.run-card:hover {
    border-color: var(--primary-color);
    background: #1e1e1e;
}

.run-card.expanded {
    border-color: var(--primary-color);
}

.run-header {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 10px;
}

.run-title {
    font-size: 1.2rem;
    font-weight: bold;
    color: #e0e0e0;
}

.run-date {
    color: #a0a0a0;
    font-size: 0.9rem;
}

.run-summary {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
    gap: 12px;
    margin-bottom: 15px;
}

/* Responsive grid adjustments for more columns on wider screens */
@media (min-width: 768px) {
    .run-summary {
        grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
        gap: 15px;
    }
}

@media (min-width: 1200px) {
    .run-summary {
        grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
        gap: 16px;
    }
}

@media (min-width: 1600px) {
    .run-summary {
        grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
        gap: 18px;
    }
}

@media (min-width: 1920px) {
    .run-summary {
        grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
        gap: 20px;
    }
}

.summary-item {
    text-align: center;
    padding: 10px;
    background: #2a2a2a;
    border-radius: 6px;
}

.summary-value {
    font-size: 1.4rem;
    font-weight: bold;
    color: var(--primary-color);
}

.summary-label {
    font-size: 0.85rem;
    color: #a0a0a0;
    margin-top: 5px;
}

.accuracy-indicator {
    display: inline-block;
    padding: 4px 8px;
    border-radius: 12px;
    font-size: 0.85rem;
    font-weight: bold;
}

.accuracy-high {
    background: #1e3a1e;
    color: #4caf50;
}

.accuracy-medium {
    background: #3a2a1e;
    color: #ff9800;
}

.accuracy-low {
    background: #3a1e1e;
    color: #f44336;
}

.status-indicator {
    display: inline-block;
    padding: 4px 8px;
    border-radius: 12px;
    font-size: 0.85rem;
    font-weight: bold;
}

.status-completed {
    background: #1e3a1e;
    color: #4caf50;
}

.status-in-progress {
    background: #1e2a3a;
    color: #2196f3;
}

.status-failed {
    background: #3a1e1e;
    color: #f44336;
}

.status-cancelled {
    background: #2a2a2a;
    color: #999;
}

.run-config {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 10px;
    margin-bottom: 15px;
    padding: 15px;
    background: #242424;
    border-radius: 6px;
}

.config-item {
    display: flex;
    justify-content: space-between;
    padding: 5px 0;
    border-bottom: 1px solid #333;
}

.config-label {
    color: #a0a0a0;
    font-size: 0.9rem;
}

.config-value {
    color: #e0e0e0;
    font-weight: 500;
}

.results-section {
    margin-top: 20px;
    display: none;
}

.results-section.visible {
    display: block;
}

.examples-grid {
    display: grid;
    gap: 15px;
    margin-top: 15px;
}

.example-card {
    background: #1a1a1a;
    border: 1px solid #333;
    border-radius: 6px;
    padding: 15px;
}

.example-card.correct {
    border-left: 4px solid #4caf50;
}

.example-card.incorrect {
    border-left: 4px solid #f44336;
}

.example-header {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 10px;
}

.example-status {
    display: flex;
    align-items: center;
    gap: 5px;
    font-weight: 600;
}

.example-status.correct {
    color: #4caf50;
}

.example-status.incorrect {
    color: #f44336;
}

.example-question {
    background: #2a2a2a;
    padding: 12px;
    border-radius: 4px;
    border-left: 4px solid var(--primary-color);
    margin-bottom: 12px;
    color: #e0e0e0;
}

.example-answers {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 12px;
}

.answer-section {
    padding: 10px;
    border-radius: 4px;
    font-size: 0.9rem;
    line-height: 1.4;
}

.model-answer-section {
    background: #1e2a3a;
    border-left: 3px solid #2196f3;
}

.correct-answer-section {
    background: #1e3a1e;
    border-left: 3px solid #4caf50;
}

.answer-label {
    font-size: 0.75rem;
    font-weight: 600;
    color: #a0a0a0;
    text-transform: uppercase;
    margin-bottom: 6px;
}

.answer-text {
    color: #e0e0e0;
}

.no-results {
    text-align: center;
    color: #a0a0a0;
    padding: 40px;
    font-style: italic;
}

.expand-indicator {
    color: #a0a0a0;
    font-size: 0.9rem;
    margin-top: 10px;
    text-align: center;
}

.dataset-breakdown {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
    gap: 10px;
    margin-top: 10px;
}

.dataset-item {
    text-align: center;
    padding: 8px;
    background: #2a2a2a;
    border-radius: 4px;
}

.dataset-name {
    font-size: 0.8rem;
    color: #a0a0a0;
    margin-bottom: 4px;
}

.dataset-accuracy {
    font-weight: bold;
    color: var(--primary-color);
}

.loading {
    text-align: center;
    padding: 40px;
    color: #a0a0a0;
}

.pagination {
    display: flex;
    justify-content: center;
    align-items: center;
    gap: 10px;
    margin-top: 30px;
}

.pagination button {
    padding: 8px 12px;
    background: #2a2a2a;
    border: 1px solid #333;
    border-radius: 4px;
    color: #e0e0e0;
    cursor: pointer;
}

.pagination button:hover {
    background: var(--primary-color);
}

.pagination button:disabled {
    opacity: 0.5;
    cursor: not-allowed;
}

.delete-btn {
    background: #3a1e1e !important;
    border-color: #f44336 !important;
    color: #f44336 !important;
    font-size: 0.8rem;
    padding: 4px 8px;
    transition: all 0.2s;
}

.delete-btn:hover:not(:disabled) {
    background: #f44336 !important;
    color: white !important;
}

.delete-btn:disabled {
    background: #2a2a2a !important;
    border-color: #555 !important;
    color: #888 !important;
    cursor: not-allowed;
}

.filters {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
    gap: 15px;
    margin-bottom: 20px;
}

/* Responsive filter adjustments for full-width layout */
@media (max-width: 767px) {
    .filters {
        grid-template-columns: 1fr 1fr;
        gap: 12px;
    }
}

@media (min-width: 768px) {
    .filters {
        grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
        gap: 16px;
    }
}

@media (min-width: 1200px) {
    .filters {
        grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
        gap: 18px;
    }
}

@media (min-width: 1600px) {
    .filters {
        grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
        gap: 20px;
    }
}

.processing-time {
    background: #2a3f2a;
    color: #90ee90;
    padding: 2px 6px;
    border-radius: 3px;
    font-size: 0.8rem;
    font-weight: 500;
    white-space: nowrap;
}

.filter-group {
    display: flex;
    flex-direction: column;
    gap: 5px;
}

.filter-group label {
    font-size: 0.85rem;
    color: #a0a0a0;
}

.filter-group select,
.filter-group input {
    padding: 6px 10px;
    background: #2a2a2a;
    border: 1px solid #333;
    border-radius: 4px;
    color: #e0e0e0;
}

.search-stats-section {
    margin-bottom: 20px;
    padding: 15px;
    background: #1e1e1e;
    border: 1px solid #333;
    border-radius: 6px;
}

.section-title {
    display: flex;
    align-items: center;
    gap: 8px;
    margin-bottom: 15px;
    color: #e0e0e0;
    font-size: 1.1rem;
    font-weight: 600;
}

.search-stats-grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
    gap: 15px;
}

.stat-item {
    text-align: center;
    padding: 12px;
    background: #2a2a2a;
    border-radius: 6px;
}

.stat-value {
    font-size: 1.4rem;
    font-weight: bold;
    color: var(--primary-color);
    margin-bottom: 5px;
}

.stat-label {
    font-size: 0.85rem;
    color: #a0a0a0;
}

.results-divider {
    height: 1px;
    background: #333;
    margin: 20px 0;
}

.examples-section {
    margin-top: 15px;
}

.example-metrics {
    display: flex;
    gap: 10px;
    align-items: center;
}

.search-results-count {
    background: #2a3a3a;
    color: #81c784;
    padding: 2px 6px;
    border-radius: 3px;
    font-size: 0.8rem;
    font-weight: 500;
    white-space: nowrap;
}
</style>
{% endblock %}

{% block content %}
<div class="page active" id="benchmark-results">
    <div class="page-header">
        <h1>Benchmark Results History</h1>
        <p class="page-subtitle">Compare accuracy across different models, search engines, and strategies</p>
    </div>

    <div class="card benchmark-results-card">
        <div class="card-content">
            <!-- Filters -->
            <div class="filters">
                <div class="filter-group">
                    <label for="accuracy-filter">Accuracy Range</label>
                    <select id="accuracy-filter">
                        <option value="">All</option>
                        <option value="high">90%+ (High)</option>
                        <option value="medium">70-90% (Medium)</option>
                        <option value="low"><70% (Low)</option>
                    </select>
                </div>
                <div class="filter-group">
                    <label for="model-filter">Model</label>
                    <select id="model-filter">
                        <option value="">All Models</option>
                    </select>
                </div>
                <div class="filter-group">
                    <label for="strategy-filter">Strategy</label>
                    <select id="strategy-filter">
                        <option value="">All Strategies</option>
                    </select>
                </div>
                <div class="filter-group">
                    <label for="status-filter">Status</label>
                    <select id="status-filter">
                        <option value="">All Statuses</option>
                        <option value="completed">Completed</option>
                        <option value="in_progress">In Progress</option>
                        <option value="failed">Failed</option>
                        <option value="cancelled">Cancelled</option>
                    </select>
                </div>
                <div class="filter-group">
                    <label for="date-filter">Date Range</label>
                    <input type="date" id="date-from">
                    <input type="date" id="date-to">
                </div>
            </div>

            <!-- Results List -->
            <div id="results-container">
                <div class="loading">
                    <i class="fas fa-spinner fa-spin"></i> Loading benchmark results...
                </div>
            </div>

            <!-- Pagination -->
            <div class="pagination" id="pagination" style="display: none;">
                <button id="prev-page">← Previous</button>
                <span id="page-info">Page 1 of 1</span>
                <button id="next-page">Next →</button>
            </div>
        </div>
    </div>
</div>

<script>
let benchmarkRuns = [];
let filteredRuns = [];
let currentPage = 1;
const itemsPerPage = 20;

document.addEventListener('DOMContentLoaded', function() {
    loadBenchmarkHistory();
    setupFilters();
});

async function loadBenchmarkHistory() {
    try {
        const response = await fetch('/benchmark/api/history');
        const data = await response.json();

        if (data.success) {
            benchmarkRuns = data.runs;
            filteredRuns = [...benchmarkRuns];
            populateFilters();
            displayResults();
        } else {
            document.getElementById('results-container').innerHTML =
                '<div class="no-results">Error loading benchmark results</div>';
        }
    } catch (error) {
        console.error('Error loading benchmark history:', error);
        document.getElementById('results-container').innerHTML =
            '<div class="no-results">Error loading benchmark results</div>';
    }
}

function populateFilters() {
    // Populate model filter
    const models = [...new Set(benchmarkRuns.map(run =>
        run.search_config?.model_name).filter(Boolean))];
    const modelFilter = document.getElementById('model-filter');
    models.forEach(model => {
        const option = document.createElement('option');
        option.value = model;
        option.textContent = model;
        modelFilter.appendChild(option);
    });

    // Populate strategy filter
    const strategies = [...new Set(benchmarkRuns.map(run =>
        run.search_config?.search_strategy).filter(Boolean))];
    const strategyFilter = document.getElementById('strategy-filter');
    strategies.forEach(strategy => {
        const option = document.createElement('option');
        option.value = strategy;
        option.textContent = strategy.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
        strategyFilter.appendChild(option);
    });
}

function setupFilters() {
    document.getElementById('accuracy-filter').addEventListener('change', applyFilters);
    document.getElementById('model-filter').addEventListener('change', applyFilters);
    document.getElementById('strategy-filter').addEventListener('change', applyFilters);
    document.getElementById('status-filter').addEventListener('change', applyFilters);
    document.getElementById('date-from').addEventListener('change', applyFilters);
    document.getElementById('date-to').addEventListener('change', applyFilters);
}

function applyFilters() {
    const accuracyFilter = document.getElementById('accuracy-filter').value;
    const modelFilter = document.getElementById('model-filter').value;
    const strategyFilter = document.getElementById('strategy-filter').value;
    const statusFilter = document.getElementById('status-filter').value;
    const dateFrom = document.getElementById('date-from').value;
    const dateTo = document.getElementById('date-to').value;

    filteredRuns = benchmarkRuns.filter(run => {
        // Accuracy filter
        if (accuracyFilter) {
            const accuracy = run.overall_accuracy || 0;
            if (accuracyFilter === 'high' && accuracy < 90) return false;
            if (accuracyFilter === 'medium' && (accuracy < 70 || accuracy >= 90)) return false;
            if (accuracyFilter === 'low' && accuracy >= 70) return false;
        }

        // Model filter
        if (modelFilter && run.search_config?.model_name !== modelFilter) return false;

        // Strategy filter
        if (strategyFilter && run.search_config?.search_strategy !== strategyFilter) return false;

        // Status filter
        if (statusFilter && run.status !== statusFilter) return false;

        // Date filters
        const runDate = new Date(run.created_at).toISOString().split('T')[0];
        if (dateFrom && runDate < dateFrom) return false;
        if (dateTo && runDate > dateTo) return false;

        return true;
    });

    currentPage = 1;
    displayResults();
}

function displayResults() {
    const container = document.getElementById('results-container');

    if (filteredRuns.length === 0) {
        container.innerHTML = '<div class="no-results">No benchmark results found</div>';
        document.getElementById('pagination').style.display = 'none';
        return;
    }

    const startIndex = (currentPage - 1) * itemsPerPage;
    const endIndex = Math.min(startIndex + itemsPerPage, filteredRuns.length);
    const pageRuns = filteredRuns.slice(startIndex, endIndex);

    const html = pageRuns.map(run => createRunCard(run)).join('');
    container.innerHTML = html;

    // Setup pagination
    setupPagination();
}

function createRunCard(run) {
    const accuracy = run.overall_accuracy || 0;
    const accuracyClass = accuracy >= 90 ? 'accuracy-high' :
                         accuracy >= 70 ? 'accuracy-medium' : 'accuracy-low';

    // Status handling
    const status = run.status || 'unknown';
    const statusClass = `status-${status.replace('_', '-')}`;
    const statusText = status.replace('_', ' ').replace(/\b\w/g, l => l.toUpperCase());

    const date = new Date(run.created_at).toLocaleDateString();
    const time = new Date(run.created_at).toLocaleTimeString();

    // Progress calculation
    const progress = run.total_examples > 0 ?
        ((run.completed_examples / run.total_examples) * 100).toFixed(1) : 0;

    // Format status text with proper capitalization
    const formattedStatus = status.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());

    return `
        <div class="run-card" onclick="toggleRunDetails(${run.id})">
            <div class="run-header">
                <div class="run-title">${run.run_name}</div>
                <div class="run-date">${date} ${time}</div>
            </div>

            <div class="run-summary">
                <div class="summary-item">
                    <div class="summary-value ${statusClass} status-indicator">${formattedStatus}</div>
                    <div class="summary-label">Status</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${run.completed_examples}/${run.total_examples}</div>
                    <div class="summary-label">Progress (${progress}%)</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value ${status === 'completed' && accuracy > 0 ? accuracyClass + ' accuracy-indicator' : ''}">${status === 'completed' && accuracy > 0 ? accuracy.toFixed(1) + '%' : 'N/A'}</div>
                    <div class="summary-label">Accuracy</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${run.search_config?.model_name || 'Unknown'}</div>
                    <div class="summary-label">Model</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${(run.search_config?.search_strategy || 'unknown').replace(/_/g, ' ')}</div>
                    <div class="summary-label">Strategy</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${run.search_config?.search_tool || 'Unknown'}</div>
                    <div class="summary-label">Search Engine</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${formatAvgSearchResults(run)}</div>
                    <div class="summary-label">Avg Search Results</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${formatAvgSearchRequests(run)}</div>
                    <div class="summary-label">Avg Search Requests</div>
                </div>
                <div class="summary-item">
                    <div class="summary-value">${formatAvgProcessingTime(run)}</div>
                    <div class="summary-label">Avg Time/Question</div>
                </div>
                <div class="summary-item">
                    <div style="display: flex; gap: 8px;">
                        ${status === 'completed' ?
                            `<button class="btn btn-outline btn-sm" onclick="event.stopPropagation(); downloadBenchmarkYAML(${run.id})" style="background: #1e3a1e !important; border-color: #4caf50 !important; color: #4caf50 !important;">
                                <i class="fas fa-download"></i> YAML
                            </button>` : ''
                        }
                        ${status === 'in_progress' ?
                            `<button class="btn btn-outline btn-sm delete-btn" onclick="event.stopPropagation(); cancelAndDeleteBenchmarkRun(${run.id})" style="background: #3a1e1e !important; border-color: #f44336 !important; color: #f44336 !important;">
                                <i class="fas fa-stop"></i> Cancel & Delete
                            </button>` :
                            `<button class="btn btn-outline btn-sm delete-btn" onclick="event.stopPropagation(); deleteBenchmarkRun(${run.id})">
                                <i class="fas fa-trash"></i> Delete
                            </button>`
                        }
                    </div>
                    <div class="summary-label">Actions</div>
                </div>
            </div>

            <div class="expand-indicator">
                <i class="fas fa-chevron-down"></i> Click to view detailed results and examples
            </div>

            <div class="results-section" id="results-${run.id}">
                <div class="run-config">
                    <div class="config-item">
                        <span class="config-label">Iterations:</span>
                        <span class="config-value">${run.search_config?.iterations || 'N/A'}</span>
                    </div>
                    <div class="config-item">
                        <span class="config-label">Questions/Iteration:</span>
                        <span class="config-value">${run.search_config?.questions_per_iteration || 'N/A'}</span>
                    </div>
                    <div class="config-item">
                        <span class="config-label">Temperature:</span>
                        <span class="config-value">${run.search_config?.temperature || 'N/A'}</span>
                    </div>
                    <div class="config-item">
                        <span class="config-label">Provider:</span>
                        <span class="config-value">${run.search_config?.provider || 'N/A'}</span>
                    </div>
                </div>

                <div id="examples-${run.id}">
                    <div class="loading">Loading detailed results...</div>
                </div>
            </div>
        </div>
    `;
}

async function toggleRunDetails(runId) {
    const resultsSection = document.getElementById(`results-${runId}`);
    const runCard = resultsSection.closest('.run-card');

    if (resultsSection.classList.contains('visible')) {
        resultsSection.classList.remove('visible');
        runCard.classList.remove('expanded');
        return;
    }

    resultsSection.classList.add('visible');
    runCard.classList.add('expanded');

    // Load examples if not already loaded
    const examplesContainer = document.getElementById(`examples-${runId}`);
    if (examplesContainer.innerHTML.includes('Loading detailed results...')) {
        await loadExamples(runId);
    }
}

async function loadExamples(runId) {
    try {
        const response = await fetch(`/benchmark/api/results/${runId}?limit=50`);
        const data = await response.json();

        const examplesContainer = document.getElementById(`examples-${runId}`);

        if (data.success && data.results.length > 0) {
            // Calculate search result statistics
            const searchResultCounts = data.results.map(r => r.search_result_count || 0);
            const avgSearchResults = searchResultCounts.length > 0
                ? (searchResultCounts.reduce((sum, count) => sum + count, 0) / searchResultCounts.length).toFixed(1)
                : 'N/A';
            const minSearchResults = searchResultCounts.length > 0 ? Math.min(...searchResultCounts) : 'N/A';
            const maxSearchResults = searchResultCounts.length > 0 ? Math.max(...searchResultCounts) : 'N/A';

            // Create search results statistics section
            const statsHtml = `
                <div class="search-stats-section">
                    <h4 class="section-title">
                        <i class="fas fa-search"></i> Search Results Statistics
                    </h4>
                    <div class="search-stats-grid">
                        <div class="stat-item">
                            <div class="stat-value">${avgSearchResults}</div>
                            <div class="stat-label">Avg Search Results</div>
                        </div>
                        <div class="stat-item">
                            <div class="stat-value">${minSearchResults}</div>
                            <div class="stat-label">Min Results</div>
                        </div>
                        <div class="stat-item">
                            <div class="stat-value">${maxSearchResults}</div>
                            <div class="stat-label">Max Results</div>
                        </div>
                        <div class="stat-item">
                            <div class="stat-value">${data.results.length}</div>
                            <div class="stat-label">Total Queries</div>
                        </div>
                    </div>
                </div>
            `;

            // Create examples grid
            const examplesHtml = data.results.map(result => createExampleCard(result)).join('');

            examplesContainer.innerHTML = `
                ${statsHtml}
                <div class="results-divider"></div>
                <div class="examples-section">
                    <h4 class="section-title">
                        <i class="fas fa-list"></i> Individual Query Results (${data.results.length} shown)
                    </h4>
                    <div class="examples-grid">${examplesHtml}</div>
                </div>
            `;
        } else {
            examplesContainer.innerHTML = '<div class="no-results">No detailed results available</div>';
        }
    } catch (error) {
        console.error('Error loading examples:', error);
        document.getElementById(`examples-${runId}`).innerHTML =
            '<div class="no-results">Error loading examples</div>';
    }
}

function formatAvgSearchResults(run) {
    if (!run.avg_search_results || run.avg_search_results <= 0) {
        return 'N/A';
    }

    return Math.round(run.avg_search_results).toString();
}

function formatAvgSearchRequests(run) {
    if (!run.total_search_requests || run.total_search_requests <= 0) {
        return 'N/A';
    }

    return Math.round(run.total_search_requests).toString();
}

function formatAvgProcessingTime(run) {
    // Calculate average processing time from completed examples
    if (!run.avg_processing_time && (!run.results || run.results.length === 0)) {
        return 'N/A';
    }

    // Use avg_processing_time if available, otherwise calculate from results
    let avgTime = run.avg_processing_time;
    if (!avgTime && run.results) {
        const timesWithValues = run.results
            .filter(r => r.processing_time && r.processing_time > 0)
            .map(r => r.processing_time);

        if (timesWithValues.length === 0) return 'N/A';
        avgTime = timesWithValues.reduce((sum, time) => sum + time, 0) / timesWithValues.length;
    }

    if (!avgTime || avgTime <= 0) return 'N/A';

    // Format time nicely
    if (avgTime < 60) {
        return `${avgTime.toFixed(1)}s`;
    } else if (avgTime < 3600) {
        const minutes = Math.floor(avgTime / 60);
        const seconds = Math.round(avgTime % 60);
        return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
    } else {
        const hours = Math.floor(avgTime / 3600);
        const minutes = Math.round((avgTime % 3600) / 60);
        return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h`;
    }
}

function createExampleCard(result) {
    const statusClass = result.is_correct ? 'correct' : 'incorrect';
    const statusIcon = result.is_correct ? '<i class="fas fa-check-circle"></i>' : '<i class="fas fa-times-circle"></i>';
    const statusText = result.is_correct ? 'Correct' : 'Incorrect';

    // Format processing time for individual result
    const processingTime = result.processing_time && result.processing_time > 0
        ? (result.processing_time < 60
            ? `${result.processing_time.toFixed(1)}s`
            : `${Math.floor(result.processing_time / 60)}m ${Math.round(result.processing_time % 60)}s`)
        : 'N/A';

    // Format search results count
    const searchResultCount = result.search_result_count || 0;

    return `
        <div class="example-card ${statusClass}">
            <div class="example-header">
                <span class="dataset-badge">${result.dataset_type}</span>
                <span class="example-status ${statusClass}">
                    ${statusIcon} ${statusText}
                </span>
                <div class="example-metrics">
                    <span class="processing-time">⏱️ ${processingTime}</span>
                    <span class="search-results-count">🔍 ${searchResultCount} results</span>
                </div>
            </div>

            <div class="example-question">
                <strong>Question:</strong> ${result.question}
            </div>

            <div class="example-answers">
                <div class="answer-section model-answer-section">
                    <div class="answer-label">Model Answer</div>
                    <div class="answer-text">${result.model_answer || 'No answer provided'}</div>
                </div>
                <div class="answer-section correct-answer-section">
                    <div class="answer-label">Expected Answer</div>
                    <div class="answer-text">${result.correct_answer || 'No expected answer'}</div>
                </div>
            </div>
        </div>
    `;
}

function setupPagination() {
    const totalPages = Math.ceil(filteredRuns.length / itemsPerPage);
    const paginationDiv = document.getElementById('pagination');

    if (totalPages <= 1) {
        paginationDiv.style.display = 'none';
        return;
    }

    paginationDiv.style.display = 'flex';

    const prevBtn = document.getElementById('prev-page');
    const nextBtn = document.getElementById('next-page');
    const pageInfo = document.getElementById('page-info');

    prevBtn.disabled = currentPage === 1;
    nextBtn.disabled = currentPage === totalPages;
    pageInfo.textContent = `Page ${currentPage} of ${totalPages}`;

    prevBtn.onclick = () => {
        if (currentPage > 1) {
            currentPage--;
            displayResults();
        }
    };

    nextBtn.onclick = () => {
        if (currentPage < totalPages) {
            currentPage++;
            displayResults();
        }
    };
}

async function cancelAndDeleteBenchmarkRun(runId) {
    if (!confirm('Are you sure you want to cancel and delete this running benchmark? This action cannot be undone.')) {
        return;
    }

    try {
        // First cancel the benchmark
        const cancelResponse = await fetch(`/benchmark/api/cancel/${runId}`, {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
            }
        });

        const cancelData = await cancelResponse.json();

        if (cancelData.success) {
            showAlert('Benchmark cancelled successfully. Deleting...', 'info');

            // Wait a moment for cancellation to process
            await new Promise(resolve => setTimeout(resolve, 1000));

            // Then delete it
            await deleteBenchmarkRun(runId);
        } else {
            showAlert('Error cancelling benchmark: ' + cancelData.error, 'error');
        }
    } catch (error) {
        console.error('Error cancelling benchmark:', error);
        showAlert('Error cancelling benchmark: ' + error.message, 'error');
    }
}

async function downloadBenchmarkYAML(runId) {
    try {
        // Find the run in our local data
        const run = benchmarkRuns.find(r => r.id === runId);
        if (!run) {
            showAlert('Benchmark run not found', 'error');
            return;
        }

        // Get current date for filename
        const date = new Date().toISOString().split('T')[0];

        // Get app version from meta tag
        const appVersion = document.querySelector('meta[name="app-version"]')?.content || 'Could not fetch version';

        // Extract model name and clean it for filename
        const modelName = run.search_config?.model_name || 'unknown-model';
        const cleanModelName = modelName.replace(/[^a-zA-Z0-9.-]/g, '-').toLowerCase();

        // Get all relevant settings from database
        let localContextWindow = 'Could not fetch';
        let maxTokens = 'Could not fetch';
        let contextWindowUnrestricted = 'Could not fetch';
        let contextWindowSize = 'Could not fetch';
        let supportsMaxTokens = 'Could not fetch';

        try {
            const settingsResponse = await fetch('/settings/api');
            if (settingsResponse.ok) {
                const data = await settingsResponse.json();
                if (data.status === 'success' && data.settings) {
                    const settings = data.settings;
                    // LLM settings - extract the 'value' property from each setting object
                    localContextWindow = settings['llm.local_context_window_size']?.value || 'Could not fetch';
                    maxTokens = settings['llm.max_tokens']?.value || 'Could not fetch';
                    contextWindowUnrestricted = settings['llm.context_window_unrestricted']?.value !== undefined ?
                        (settings['llm.context_window_unrestricted'].value ? 'Yes' : 'No') : 'Could not fetch';
                    contextWindowSize = settings['llm.context_window_size']?.value || 'Could not fetch';
                    supportsMaxTokens = settings['llm.supports_max_tokens']?.value !== undefined ?
                        (settings['llm.supports_max_tokens'].value ? 'Yes' : 'No') : 'Could not fetch';
                }
            }
        } catch (e) {
            console.error('Could not fetch current settings:', e);
        }

        // Calculate average search results if available
        const avgSearchResults = formatAvgSearchResults(run).replace(' results', '');
        const searchResultsNum = avgSearchResults !== 'N/A' ? avgSearchResults : '# Please fill in';

        // Generate YAML content
        const yamlContent = `# Benchmark Result
# Generated from Local Deep Research v${appVersion}
# Date: ${date}

# Model Information
model: ${modelName}
model_provider: ${run.search_config?.provider || 'unknown'}
quantization: # Please fill in if applicable

# Search Engine (critical for benchmark reproducibility)
search_engine: ${run.search_config?.search_tool || 'unknown'}
search_provider_version: # if known, e.g., "latest", "2024.1.0"
average_results_per_query: ${searchResultsNum}

# Hardware
hardware:
  gpu: # Please fill in
  ram: # Please fill in
  cpu: # Please fill in

# Benchmark Results
results:
  dataset: SimpleQA
  total_questions: ${run.total_examples}

  ${run.search_config?.search_strategy === 'focused_iteration' ? 'focused_iteration' : 'source_based'}:
    accuracy: ${run.overall_accuracy ? run.overall_accuracy.toFixed(1) : 0}% (${Math.round(run.overall_accuracy * run.total_examples / 100)}/${run.total_examples})
    iterations: ${run.search_config?.iterations || 'N/A'}
    questions_per_iteration: ${run.search_config?.questions_per_iteration || 'N/A'}
    avg_time_per_question: ${formatAvgProcessingTime(run)}
    total_tokens_used: # if available

# Configuration
configuration:
  context_window: ${localContextWindow} # Current setting at download time - may differ from benchmark run
  temperature: ${run.search_config?.temperature || 'N/A'}
  max_tokens: ${maxTokens} # Current setting at download time
  local_provider_context_window_size: ${localContextWindow} # Current setting at download time
  context_window_unrestricted: ${contextWindowUnrestricted} # Current setting at download time

# Versions
versions:
  ldr_version: ${appVersion}
  ollama_version: # if applicable

# Test Details
test_details:
  date_tested: ${date}
  rate_limiting_issues: # yes/no
  search_failures: # number of failed searches, if any

# Notes
notes: |
  # Add any observations, errors, or insights here
  # Search strategy: ${run.search_config?.search_strategy || 'unknown'}
  # Provider: ${run.search_config?.provider || 'unknown'}
  # Note: Configuration values are from current settings at download time,
  # not necessarily the values used during the benchmark run
`;

        // Create blob and download
        const blob = new Blob([yamlContent], { type: 'text/yaml' });
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.style.display = 'none';
        a.href = url;
        a.download = `${cleanModelName}_${date}.yaml`;
        document.body.appendChild(a);
        a.click();
        window.URL.revokeObjectURL(url);
        document.body.removeChild(a);

        showAlert('Benchmark YAML downloaded! Hardware details are optional but helpful for performance context.', 'success');
    } catch (error) {
        console.error('Error downloading YAML:', error);
        showAlert('Error downloading YAML: ' + error.message, 'error');
    }
}

async function deleteBenchmarkRun(runId) {
    try {
        const response = await fetch(`/benchmark/api/delete/${runId}`, {
            method: 'DELETE',
            headers: {
                'Content-Type': 'application/json',
            }
        });

        const data = await response.json();

        if (data.success) {
            // Show success message
            showAlert('Benchmark run deleted successfully!', 'success');

            // Remove the run from our local data
            benchmarkRuns = benchmarkRuns.filter(run => run.id !== runId);

            // Reapply filters and redisplay
            applyFilters();
        } else {
            showAlert('Error deleting benchmark run: ' + data.error, 'error');
        }
    } catch (error) {
        console.error('Error deleting benchmark run:', error);
        showAlert('Error deleting benchmark run: ' + error.message, 'error');
    }
}

function showAlert(message, type) {
    // Create alert element
    const alertDiv = document.createElement('div');
    alertDiv.className = `alert alert-${type}`;
    alertDiv.style.cssText = `
        position: fixed;
        top: 20px;
        right: 20px;
        z-index: 1000;
        max-width: 400px;
        padding: 15px;
        border-radius: 6px;
        color: white;
        font-weight: 500;
        box-shadow: 0 4px 12px rgba(0,0,0,0.3);
    `;

    // Set background color based on type
    const colors = {
        success: '#4caf50',
        error: '#f44336',
        warning: '#ff9800',
        info: '#2196f3'
    };
    alertDiv.style.backgroundColor = colors[type] || colors.info;

    alertDiv.innerHTML = `
        <div style="display: flex; align-items: center; justify-content: space-between;">
            <span>${message}</span>
            <button onclick="this.parentElement.parentElement.remove()" style="background: none; border: none; color: white; font-size: 1.2rem; cursor: pointer; margin-left: 10px;">&times;</button>
        </div>
    `;

    document.body.appendChild(alertDiv);

    // Auto-remove after 5 seconds
    setTimeout(() => {
        if (alertDiv.parentElement) {
            alertDiv.remove();
        }
    }, 5000);
}
</script>
{% endblock %}
